Column sum and sparsity

## [1]  3005 16450

Least sparse genes (most non-zeros)

Most sparse genes (most zeros)

Medium sparse genes

Lingxue selected genes

Most sparse genes

Singular values

Left eigenvectors (Cells)

Right eigenvectors (Genes)

Analysis using most sparse genes

Removing cells with almost no counts anywhere

## [1] 2326  700
##                       [,1] [,2]
## astrocytes       0.2321429  224
## endothelial      0.1574468  235
## interneurons     0.9931034  290
## microglia        0.1428571   98
## oligodendrocytes 0.7365854  820
## pyramidal CA1    0.9957401  939
## pyramidal SS     0.9924812  399
par(mfrow = c(1,2))
plot(table(dat3))
plot(table(dat3), ylim = c(0, 100))

Singular values

Left eigenvectors (Cells)

Right eigenvectors (Genes)

Modeling approximation

dat_approx <- res_svd$u[,1:2] %*% diag(res_svd$d[1:2]) %*% t(res_svd$v[,1:2])
residual <- dat3 - dat_approx

set.seed(10)
idx <- sample(prod(dim(dat3)), 5e5)
plot(as.vector(dat_approx)[idx], as.vector(residual)[idx], pch = 16, xlab = "mean",
     ylab = "residual", col = rgb(0,0,0,0.1))
lines(c(-1e4, 1e4), rep(0,2), col = "red", lwd = 2)

plot(as.numeric(dat_approx)[idx], as.numeric(dat)[idx], col = rgb(0,0,0,0.1))
mean_val <- seq(0, 50, by = 0.25)
lines(mean_val, mean_val, col = "red", lwd = 2)
sd_top <- mean_val + 3*sqrt(mean_val)
sd_bot <- mean_val - 3*sqrt(mean_val)
lines(mean_val, sd_top, col = "red", lwd = 2, lty = 2)
lines(mean_val, sd_bot, col = "red", lwd = 2, lty = 2)

Thresholding matrix

Singular values

Left eigenvectors (Cells)

Right eigenvectors (Genes)

Modeling approximation

dat_approx <- res_svd$u[,1:2] %*% diag(res_svd$d[1:2]) %*% t(res_svd$v[,1:2])
residual <- dat4 - dat_approx

set.seed(10)
idx <- sample(prod(dim(dat4)), 5e5)
plot(as.vector(dat_approx)[idx], as.vector(residual)[idx], pch = 16, xlab = "mean",
     ylab = "residual", col = rgb(0,0,0,0.1))
lines(c(-1e4, 1e4), rep(0,2), col = "red", lwd = 2)

plot(as.numeric(dat_approx)[idx], as.numeric(dat)[idx], col = rgb(0,0,0,0.1))
mean_val <- seq(0, 50, by = 0.25)
lines(mean_val, mean_val, col = "red", lwd = 2)
sd_top <- mean_val + 3*sqrt(mean_val)
sd_bot <- mean_val - 3*sqrt(mean_val)
lines(mean_val, sd_top, col = "red", lwd = 2, lty = 2)
lines(mean_val, sd_bot, col = "red", lwd = 2, lty = 2)

Column ANOVA

Largest

Smallest

Medium

Singular values

Left eigenvectors (Cells)

Right eigenvectors (Genes)

Most differentiated

##      interneurons pyramidal SS pyramidal CA1 oligodendrocytes  microglia
## 0%     0.00000000   0.00000000    0.00000000      0.000000000 0.00000000
## 25%    0.01034483   0.01253133    0.01064963      0.002439024 0.00000000
## 50%    0.03103448   0.02756892    0.03194888      0.008536585 0.00000000
## 75%    0.06551724   0.05263158    0.06922258      0.019512195 0.02040816
## 100%   0.91724138   0.69172932    0.83493078      0.841463415 0.97959184
##      endothelial  astrocytes
## 0%   0.000000000 0.000000000
## 25%  0.000000000 0.000000000
## 50%  0.004255319 0.008928571
## 75%  0.017021277 0.017857143
## 100% 0.672340426 0.785714286

Most distinguished for each cell type

Least distinguished

Medium distinguished

Singular values

Left eigenvectors (Cells)

Right eigenvectors (Genes)

Analysis using most differentiated

Removing cells with almost no counts anywhere

## [1] 3005  700
par(mfrow = c(1,2))
plot(table(dat3))
plot(table(dat3), ylim = c(0, 100))

Singular values

Left eigenvectors (Cells)

Right eigenvectors (Genes)

Modeling approximation

dat_approx <- res_svd$u[,1:8] %*% diag(res_svd$d[1:8]) %*% t(res_svd$v[,1:8])
residual <- dat3 - dat_approx

set.seed(10)
idx <- sample(prod(dim(dat3)), 5e5)
plot(as.vector(dat_approx)[idx], as.vector(residual)[idx], pch = 16, xlab = "mean",
     ylab = "residual", col = rgb(0,0,0,0.1))
lines(c(-1e4, 1e4), rep(0,2), col = "red", lwd = 2)

plot(as.numeric(dat_approx)[idx], as.numeric(dat)[idx], col = rgb(0,0,0,0.1))
mean_val <- seq(0, 500, by = 1)
lines(mean_val, mean_val, col = "red", lwd = 2)
sd_top <- mean_val + 3*sqrt(mean_val)
sd_bot <- mean_val - 3*sqrt(mean_val)
lines(mean_val, sd_top, col = "red", lwd = 2, lty = 2)
lines(mean_val, sd_bot, col = "red", lwd = 2, lty = 2)

Thresholding matrix

Singular values

Left eigenvectors (Cells)

Right eigenvectors (Genes)

Modeling approximation

dat_approx <- res_svd$u[,1:7] %*% diag(res_svd$d[1:7]) %*% t(res_svd$v[,1:7])
residual <- dat4 - dat_approx

set.seed(10)
idx <- sample(prod(dim(dat4)), 5e5)
plot(as.vector(dat_approx)[idx], as.vector(residual)[idx], pch = 16, xlab = "mean",
     ylab = "residual", col = rgb(0,0,0,0.1))
lines(c(-1e4, 1e4), rep(0,2), col = "red", lwd = 2)

plot(as.numeric(dat_approx)[idx], as.numeric(dat)[idx], col = rgb(0,0,0,0.1))
mean_val <- seq(0, 50, by = 0.25)
lines(mean_val, mean_val, col = "red", lwd = 2)
sd_top <- mean_val + 3*sqrt(mean_val)
sd_bot <- mean_val - 3*sqrt(mean_val)
lines(mean_val, sd_top, col = "red", lwd = 2, lty = 2)
lines(mean_val, sd_bot, col = "red", lwd = 2, lty = 2)